Overview

This notebook performs differential expression analysis of human endogenous retroviruses (L1s) across cell-type clusters and brain regions from ASAP PMDBS snRNA-seq data.

Specifically, it: * Defines helper functions for extracting significant genes and visualizing expression (custom getSignName, getAverage, meanPlot_cus, and box_pseudobulk). * Loads TE (L1s) count data generated from trusTEr outputs and organizes them by region and cluster. * Builds DESeq2 models to compare PD vs. control expression across multiple clusters. * Produces summary tables of DE results, mean expression plots, and box plots for selected example L1s. * Identifies overlaps of upregulated L1s across cell types and regions using UpSet plots. * Compares in vivo upregulated L1s in microglia with in vitro stimulation experiments.

Set up

Loading data and needed libraries

library(UpSetR)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     shift
## The following object is masked from 'package:GenomicRanges':
## 
##     shift
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(grid)
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::between()      masks data.table::between()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks data.table::first(), S4Vectors::first()
## ✖ lubridate::hour()     masks data.table::hour()
## ✖ lubridate::isoweek()  masks data.table::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ dplyr::last()         masks data.table::last()
## ✖ lubridate::mday()     masks data.table::mday()
## ✖ lubridate::minute()   masks data.table::minute()
## ✖ lubridate::month()    masks data.table::month()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ lubridate::quarter()  masks data.table::quarter()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks data.table::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks IRanges::slice()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ lubridate::wday()     masks data.table::wday()
## ✖ lubridate::week()     masks data.table::week()
## ✖ lubridate::yday()     masks data.table::yday()
## ✖ lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)

load("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/preprocessed_TE_data.RData")
source("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/TE_DEA_functions.R")
source("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/TE_heatmap_functions.R")
# Load samplesheet
samplesheet <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/ASAP_samplesheet.xlsx")
# I want to split TE_data$te_counts per region, but first I need to categorize the columns by region
clusters <- colnames(TE_data$te_counts)[7:ncol(TE_data$te_counts)]
clusters_per_region <- data.frame(cluster = clusters,
                                  region = str_extract(clusters, "SN|PUT|PFC|AMY")) %>%
  group_by(region) %>%
  summarise(clusters = list(cluster)) %>%
  ungroup()

expressed_L1s_bulkrna <- fread("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_bulkRNAseq/results/tables/expressed_FL_L1HS_L1PA3_expressed.tab", data.table = F, header = F)$V1
# Now we loop through the regions and select only the region's columns
te_counts <- list()
for (i in 1:nrow(clusters_per_region)) {
  region <- clusters_per_region$region[i]
  cluster_cols <- clusters_per_region$clusters[[i]] # these are vectors of column names
  # Subset counts dataframe by these columns and keep Geneid
  te_counts[[region]] <- TE_data$te_counts %>%
    select(Geneid, all_of(cluster_cols)) %>%
    filter(Geneid %in% expressed_L1s_bulkrna) # Expressed in bulk RNA
}

head(te_counts$SN)

2. Differential Expression Analysis (DEA) with DESeq2

te_dds_list <- list()
te_exp_list <- list()
te_res_list <- list()
te_res_list_df <- list()
regions <- c("SN", "PUT", "PFC", "AMY")
for(cluster in as.character(0:7)){
  for(region in regions){
    print(cluster)
    print(region)
    samplesheet_list <- split(samplesheet, f = samplesheet$Region)
    rownames(samplesheet_list[[region]]) <- samplesheet_list[[region]]$Sample
    samples_cluster <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
    samples_cluster <- samples_cluster[which(samples_cluster %in% colnames(te_counts[[region]]))]
    rownames(samplesheet_list[[region]]) <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
    # Expressed in snRNA
    te_counts_expressed <- te_counts[[region]][which(rowSums(te_counts[[region]][,samples_cluster]) > 0),]
    print(paste0("Cluster ", cluster, ", region ", region, " expresses ", nrow(te_counts_expressed)))
    
    region_cluster <- paste0(region, "_", cluster)
    te_dds_list[[region_cluster]] <- DESeqDataSetFromMatrix((te_counts_expressed[,samples_cluster]+1), samplesheet_list[[region]][samples_cluster,], design =  ~ Dx)
    te_dds_list[[region_cluster]]$Dx <- relevel(te_dds_list[[region_cluster]]$Dx, "Ctl")
    error_handle <- tryCatch({
      te_dds_list[[region_cluster]] <- DESeq(te_dds_list[[region_cluster]])
    }, error = function(e) {
      print(paste("Something happened with ", region, cluster))
      return(1)
    })
    if(is.numeric(error_handle)){
      if(error_handle == 1){
        te_dds_list[[region_cluster]] <- NA
        te_res_list[[region_cluster]] <- NA
        te_res_list_df[[region_cluster]] <- NA
        te_exp_list[[region_cluster]] <- NA
      }
    }else{
      te_exp_list[[region_cluster]] <- getAverage(te_dds_list[[region_cluster]])
      te_res_list[[region_cluster]] <- lfcShrink(te_dds_list[[region_cluster]], "Dx_PD_vs_Ctl")
      te_res_list_df[[region_cluster]] <- as.data.frame(te_res_list[[region_cluster]])
      te_res_list_df[[region_cluster]]$L1s_id <- rownames(te_res_list_df[[region_cluster]])
    }
  }
}
## [1] "0"
## [1] "SN"
## [1] "Cluster 0, region SN expresses 98"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "PUT"
## [1] "Cluster 0, region PUT expresses 518"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 25 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "PFC"
## [1] "Cluster 0, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 11 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "AMY"
## [1] "Cluster 0, region AMY expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 16 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "SN"
## [1] "Cluster 1, region SN expresses 458"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 26 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## [1] "1"
## [1] "PUT"
## [1] "Cluster 1, region PUT expresses 515"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 25 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "PFC"
## [1] "Cluster 1, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 10 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## [1] "1"
## [1] "AMY"
## [1] "Cluster 1, region AMY expresses 518"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 11 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "SN"
## [1] "Cluster 2, region SN expresses 517"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 21 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "PUT"
## [1] "Cluster 2, region PUT expresses 519"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 15 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "PFC"
## [1] "Cluster 2, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 14 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "AMY"
## [1] "Cluster 2, region AMY expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 17 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "SN"
## [1] "Cluster 3, region SN expresses 519"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 23 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "PUT"
## [1] "Cluster 3, region PUT expresses 514"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 24 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "PFC"
## [1] "Cluster 3, region PFC expresses 513"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 10 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "AMY"
## [1] "Cluster 3, region AMY expresses 519"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 28 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "SN"
## [1] "Cluster 4, region SN expresses 487"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 14 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "PUT"
## [1] "Cluster 4, region PUT expresses 476"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 20 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "PFC"
## [1] "Cluster 4, region PFC expresses 442"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 6 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "AMY"
## [1] "Cluster 4, region AMY expresses 490"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 9 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "SN"
## [1] "Cluster 5, region SN expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 18 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "PUT"
## [1] "Cluster 5, region PUT expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 15 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "PFC"
## [1] "Cluster 5, region PFC expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 23 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "AMY"
## [1] "Cluster 5, region AMY expresses 520"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 36 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## Warning in nbinomGLM(x = x, Y = YNZ, size = size, weights = weightsNZ, offset =
## offsetNZ, : the line search routine failed, unable to sufficiently decrease the
## function value
## [1] "6"
## [1] "SN"
## [1] "Cluster 6, region SN expresses 324"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 15 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "PUT"
## [1] "Cluster 6, region PUT expresses 226"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 5 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "PFC"
## [1] "Cluster 6, region PFC expresses 338"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 16 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "AMY"
## [1] "Cluster 6, region AMY expresses 263"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 13 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "SN"
## [1] "Cluster 7, region SN expresses 212"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 3 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "PUT"
## [1] "Cluster 7, region PUT expresses 91"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 5 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "PFC"
## [1] "Cluster 7, region PFC expresses 62"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : Estimated rdf < 1.0; not estimating variance
## final dispersion estimates
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "AMY"
## [1] "Cluster 7, region AMY expresses 102"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 5 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
openxlsx::write.xlsx(te_res_list_df, paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/L1s_PDvsCtl_res.xlsx", sep=""))

3. Mean expression scatter plots

mean_plots_list <- list()
te_res_expressed_list_df <- list()

for(region_cluster in names(te_exp_list)){
    if(!all(is.na(te_res_list[[region_cluster]]))){
      region <- sapply(str_split(region_cluster, "_"), `[[`, 1)
      effect <- unique(samplesheet_list[[region]][which(samplesheet_list[[region]]$Dx != "Ctl"),"Dx"])
      
      colors_list <- pick_colors_meanplot(te_res_list[[region_cluster]])
      mean_plots_list[[region_cluster]] <- meanPlot_cus(exp = te_exp_list[[region_cluster]]$Mean,
                                                        test = te_res_list[[region_cluster]], 
                                                        col1=colors_list$col1, col2=colors_list$col2, col3=colors_list$col3,
                                                        c1 = effect, c2="Ctl", p = 0.05, l=1, ttl = region_cluster, repel = F)
    }
}

# Neurons (cluster 0): 
# Very little number of nuclei in SN in this cluster - ignored
mean_plots_list$PUT_0 
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PFC_0 
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$AMY_0 
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Neurons (cluster 1)
mean_plots_list$PUT_1 
## Warning: Removed 515 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PFC_1 
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$AMY_1 
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Astrocytes
mean_plots_list$SN_2 
## Warning: Removed 517 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PUT_2
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PFC_2 
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$AMY_2
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

# OPCs
mean_plots_list$SN_3
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PUT_3
## Warning: Removed 514 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PFC_3 
## Warning: Removed 513 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$AMY_3
## Warning: Removed 519 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Microglia
mean_plots_list$SN_4
## Warning: Removed 487 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PUT_4
## Warning: Removed 476 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PFC_4 
## Warning: Removed 442 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$AMY_4 
## Warning: Removed 490 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Oligodendrocytes
mean_plots_list$SN_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PUT_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$PFC_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

mean_plots_list$AMY_5
## Warning: Removed 520 rows containing missing values or values outside the scale range
## (`geom_point()`).

# VLMCs and Tcells skipped as their clusters are too small 

4. Box plots

L1s_counts_norm <- list()
for(region_cluster in names(te_dds_list)){
  L1s_counts_norm[[region_cluster]] <- list()
  error_handle <- tryCatch({
      L1s_counts_norm[[region_cluster]] <- counts(te_dds_list[[region_cluster]], normalize = T)
    }, error = function(e) {
      print(paste("Something happened with ", region_cluster))
      return(1)
    })
}

samplesheet_list[["SN"]]$sample_cluster <- paste(samplesheet_list$SN$Sample, "4", sep="_")
samplesheet_list[["PUT"]]$sample_cluster <- paste(samplesheet_list$PUT$Sample, "4", sep="_")

plot_list_sn <- list()
plot_list_put <- list()
for (L1s in te_res_list_df$SN_4[which(te_res_list_df$SN_4$padj < 0.05 & te_res_list_df$SN_4$log2FoldChange > 0),"TE_id"]){
  plot_list_sn[[L1s]] <- box_pseudobulk(df = L1s_counts_norm$SN_4,res =  te_res_list_df$SN_4,gene =  L1s, log2=F)
  plot_list_put[[L1s]] <- box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, L1s, log2=F)
}

box_pseudobulk(df = L1s_counts_norm$SN_4, te_res_list_df$SN_4, "L1PA2_dup1272", log2=T)
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, "L1PA2_dup1272", log2=T)

box_pseudobulk(df = L1s_counts_norm$PFC_4, te_res_list_df$PFC_4, "L1PA2_dup1272", log2=T)

box_pseudobulk(df = L1s_counts_norm$AMY_4, te_res_list_df$AMY_4, "L1PA2_dup1272", log2=T)

box_pseudobulk(df = L1s_counts_norm$SN_4, te_res_list_df$SN_4, "L1PA3_dup8310", log2=T)

box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, "L1PA3_dup8310", log2=T)

box_pseudobulk(df = L1s_counts_norm$PFC_4, te_res_list_df$PFC_4, "L1PA3_dup8310", log2=T)

box_pseudobulk(df = L1s_counts_norm$AMY_4, te_res_list_df$AMY_4, "L1PA3_dup8310", log2=T)

box_pseudobulk(df = L1s_counts_norm$SN_4, te_res_list_df$SN_4, "L1PA3_dup6434", log2=T)

box_pseudobulk(df = L1s_counts_norm$PUT_4, te_res_list_df$PUT_4, "L1PA3_dup6434", log2=T)

box_pseudobulk(df = L1s_counts_norm$PFC_4, te_res_list_df$PFC_4, "L1PA3_dup6434", log2=T)

box_pseudobulk(df = L1s_counts_norm$AMY_4, te_res_list_df$AMY_4, "L1PA3_dup6434", log2=T)

5. Intersection of upregulated L1s

get_significant_L1s <- function(df){
  det <- df[which(df$padj < 0.05 & df$log2FoldChange > 1), "TE_id"]
  if (length(det) > 0){
    return(det)  
  }else{
    return(NA)
  }
}

# Astrocytes
upset(fromList(sapply(list("SN_2" = te_res_list_df$SN_2,
                           "PUT_2" = te_res_list_df$PUT_2,
                           "PFC_2" = te_res_list_df$PFC_2,
                           "AMY_2" = te_res_list_df$AMY_2), get_significant_L1s)))
grid.text("Astrocytes", vjust = -17, hjust= -3)

# Microglia
upset(fromList(sapply(list("SN_4" = te_res_list_df$SN_4,
                           "PUT_4" = te_res_list_df$PUT_4,
                           "PFC_4" = te_res_list_df$PFC_4,
                           "AMY_4" = te_res_list_df$AMY_4), get_significant_L1s)))
grid.text("Microglia", vjust = -17, hjust= -3)

# OPCs
upset(fromList(sapply(list("SN_3" = te_res_list_df$SN_3,
                           "PUT_3" = te_res_list_df$PUT_3,
                           "PFC_3" = te_res_list_df$PFC_3,
                           "AMY_3" = te_res_list_df$AMY_3), get_significant_L1s)))
grid.text("OPCs", vjust = -17, hjust= -3)

# Oligos
upset(fromList(sapply(list("SN_5" = te_res_list_df$SN_5,
                           "PUT_5" = te_res_list_df$PUT_5,
                           "PFC_5" = te_res_list_df$PFC_5,
                           "AMY_5" = te_res_list_df$AMY_5), get_significant_L1s)))
grid.text("Oligodendrocytes", vjust = -17, hjust= 0)

# Neurons cluster 1
upset(fromList(sapply(list("PUT_1" = te_res_list_df$PUT_1,
                           "PFC_1" = te_res_list_df$PFC_1,
                           "AMY_1" = te_res_list_df$AMY_1), get_significant_L1s)))
grid.text("Neurons cluster 1", vjust = -17, hjust= -3)

# All in PUT
upset(fromList(list("OPCs" = get_significant_L1s(te_res_list_df$PUT_3),
                    # "Oligos" = get_significant_L1s(te_res_list_df$PUT_5),
                    "Microglia" = get_significant_L1s(te_res_list_df$PUT_4),
                    # "VLMCs" = get_significant_L1s(te_res_list_df$PUT_6),
                    # "T cells" = get_significant_L1s(te_res_list_df$PUT_7),
               # "Exc Neurons" = get_significant_L1s(te_res_list_df$PUT_0),
               # "Inh Neurons" = get_significant_L1s(te_res_list_df$PUT_1),
               "Astrocytes" = get_significant_L1s(te_res_list_df$PUT_2))), sets = c("OPCs", "Microglia", "Astrocytes"), nintersects = 100, nsets = 100, mainbar.y.max = 30) 
grid.text("Putamen", vjust = -17, hjust= 0)

# All in AMY
upset(fromList(list("Exc Neurons" = get_significant_L1s(te_res_list_df$AMY_0),
                    "Inh Neurons" = get_significant_L1s(te_res_list_df$AMY_1),
                    "Astrocytes" = get_significant_L1s(te_res_list_df$AMY_2),
                    # "OPCs" = get_significant_L1s(te_res_list_df$AMY_3),
                    "Oligos" = get_significant_L1s(te_res_list_df$AMY_5)
                    # "Microglia" = get_significant_L1s(te_res_list_df$AMY_4),
                    # "VLMCs" = get_significant_L1s(te_res_list_df$AMY_6),
                    # "T cells" = get_significant_L1s(te_res_list_df$AMY_7)
                    )), sets = c("Exc Neurons", "Inh Neurons", "Astrocytes", "Oligos"), nintersects = 100, nsets = 100, mainbar.y.max = 30) 
grid.text("Amygdala", vjust = -17, hjust= 0)

upset(fromList(list("SN: OPCs" = get_significant_L1s(te_res_list_df$SN_3),
     "SN: Oligos" = get_significant_L1s(te_res_list_df$SN_5),
     "SN: Microglia" = get_significant_L1s(te_res_list_df$SN_4),
     "SN: VLMCs" = get_significant_L1s(te_res_list_df$SN_6),
     "SN: T cells" = get_significant_L1s(te_res_list_df$SN_7),
     "SN: Neurons cluster 0" = get_significant_L1s(te_res_list_df$SN_0),
     "SN: Neurons cluster 1" = get_significant_L1s(te_res_list_df$SN_1),
     "SN: Astrocytes" = get_significant_L1s(te_res_list_df$SN_2),
     
     "PUT: OPCs" = get_significant_L1s(te_res_list_df$PUT_3),
     "PUT: Oligos" = get_significant_L1s(te_res_list_df$PUT_5),
     "PUT: Microglia" = get_significant_L1s(te_res_list_df$PUT_4),
     "PUT: VLMCs" = get_significant_L1s(te_res_list_df$PUT_6),
     "PUT: T cells" = get_significant_L1s(te_res_list_df$PUT_7),
     "PUT: Neurons cluster 0" = get_significant_L1s(te_res_list_df$PUT_0),
     "PUT: Neurons cluster 1" = get_significant_L1s(te_res_list_df$PUT_1),
     "PUT: Astrocytes" = get_significant_L1s(te_res_list_df$PUT_2),
     
     "PFC: OPCs" = get_significant_L1s(te_res_list_df$PFC_3),
     "PFC: Oligos" = get_significant_L1s(te_res_list_df$PFC_5),
     "PFC: Microglia" = get_significant_L1s(te_res_list_df$PFC_4),
     "PFC: VLMCs" = get_significant_L1s(te_res_list_df$PFC_6),
     "PFC: T cells" = get_significant_L1s(te_res_list_df$PFC_7),
     "PFC: Neurons cluster 0" = get_significant_L1s(te_res_list_df$PFC_0),
     "PFC: Neurons cluster 1" = get_significant_L1s(te_res_list_df$PFC_1),
     "PFC: Astrocytes" = get_significant_L1s(te_res_list_df$PFC_2),
     
     "AMY: OPCs" = get_significant_L1s(te_res_list_df$AMY_3),
     "AMY: Oligos" = get_significant_L1s(te_res_list_df$AMY_5),
     "AMY: Microglia" = get_significant_L1s(te_res_list_df$AMY_4),
     "AMY: VLMCs" = get_significant_L1s(te_res_list_df$AMY_6),
     "AMY: T cells" = get_significant_L1s(te_res_list_df$AMY_7),
     "AMY: Neurons cluster 0" = get_significant_L1s(te_res_list_df$AMY_0),
     "AMY: Neurons cluster 1" = get_significant_L1s(te_res_list_df$AMY_1),
     "AMY: Astrocytes" = get_significant_L1s(te_res_list_df$AMY_2))), nintersects = 100, nsets = 100, mainbar.y.max = 30)